#Look for hourly patterns
hourly_data <- coc
# Clean up bad column names
hourly_data <- hourly_data %>% clean_names()
#na.omit(hourly_data)
#summary(hourly_data)
hourly_data$hour <- hour(as.POSIXlt(hourly_data$trip_start_timestamp, format="%m/%d/%Y %I:%M:%S %p"))
tips <- subset(hourly_data, hourly_data$tip > 0)
hourly_data$isTipper <- FALSE
hourly_data$isTipper[hourly_data$tip > 0] <- TRUE
my_pie <- data.frame(
group=c("Tippers", "NoTips"),
value2=c(toString(count(tips)),toString(count(hourly_data)-count(tips)))
)
my_pie$value <- as.numeric(as.character(my_pie$value))
my_pie <- my_pie[,-c(2)]
head(my_pie)
## group value
## 1 Tippers 9961
## 2 NoTips 42039
#ggplot(my_pie, aes(x="", y=value, fill=group)) +
# geom_bar(stat="identity", width=1, color="white") +
# coord_polar("y", start=0) +
# theme_void()
df <- my_pie %>%
# factor levels need to be the opposite order of the cumulative sum of the values
mutate(group = group,
cumulative = cumsum(value),
midpoint = cumulative - value / 2,
label = paste0(group, " ", round(value / sum(value) * 100, 1), "%"))
ggplot(df, aes(x = 1, weight = value, fill = group)) +
geom_bar(width = 1, position = "stack") +
coord_polar(theta = "y") +
geom_text(aes(x = 1.3, y = midpoint, label = label)) +
theme(axis.ticks = element_blank(),
axis.text = element_blank(),
axis.title = element_blank(),
plot.caption = element_text(hjust = 0.5)) +
ggtitle("Percentage of Riders Who Tip")
#general pie chart - no ggplot
#pie(my_pie$value, my_pie$group)
percTip <- count(tips) / (count(hourly_data))
percTip
## n
## 1 0.1915577
ggplot (hourly_data, aes(x=hourly_data$fare, y=tip)) +geom_point()
#### look at historgram without zero tips.
ggplot (tips, aes(x=tips$tip)) +geom_histogram(bins = 24)
#### First look at historgram without zero tips.
ggplot (hourly_data, aes(x=hourly_data$shared_trip_authorized, y=tip))+ geom_boxplot()
ggplot (hourly_data, aes(x=hourly_data$hour)) +geom_histogram(bins = 24)
#ggplot (hourly_data, aes(x=hourly_data$trip_start_timestamp, y=`trip_total`))+ geom_col()
Now let’s look at all the pick-up locations:
# Chicago = 41.8781° N, -87.6298° W
p <-ggmap(get_googlemap(center = c(lon = -87.629800, lat =41.878100 ),
zoom = 11, scale = 2,
maptype ='roadmap',
color = 'color', key = myKey))
## Source : https://maps.googleapis.com/maps/api/staticmap?center=41.8781,-87.6298&zoom=11&size=640x640&scale=2&maptype=roadmap&key=xxx
p + geom_point(aes(x = Pickup.Centroid.Longitude, y = Pickup.Centroid.Latitude), data = coc, size = 0.7) +
theme(legend.position="bottom")
## Warning: Removed 5918 rows containing missing values (geom_point).
Now let’s look at all the drop-off locations:
d <-ggmap(get_googlemap(center = c(lon = -87.629800, lat =41.878100 ),
zoom = 11, scale = 2,
maptype ='terrain',
color = 'color', key = myKey))
## Source : https://maps.googleapis.com/maps/api/staticmap?center=41.8781,-87.6298&zoom=11&size=640x640&scale=2&maptype=terrain&key=xxx
d + geom_point(aes(x = Dropoff.Centroid.Longitude, y = Dropoff.Centroid.Latitude), data = coc, size = 0.7) +
theme(legend.position="bottom")
## Warning: Removed 6710 rows containing missing values (geom_point).
######### #REPEAT ABOVE WITH ZOOM ######### ### Pick-Up Density
Now let’s look at all the pick-up locations:
# Chicago = 41.8781° N, -87.6298° W
p <-ggmap(get_googlemap(center = c(lon = -87.629800, lat =41.878100 ),
zoom = 12, scale = 2,
maptype ='terrain',
color = 'color', key = myKey))
## Source : https://maps.googleapis.com/maps/api/staticmap?center=41.8781,-87.6298&zoom=12&size=640x640&scale=2&maptype=terrain&key=xxx
p + geom_point(aes(x = Pickup.Centroid.Longitude, y = Pickup.Centroid.Latitude), data = coc, size = 0.7) +
theme(legend.position="bottom")
## Warning: Removed 15497 rows containing missing values (geom_point).
Now let’s look at all the drop-off locations:
d <-ggmap(get_googlemap(center = c(lon = -87.629800, lat =41.878100 ),
zoom = 14, scale = 2,
maptype ='terrain',
color = 'color', key = myKey))
## Source : https://maps.googleapis.com/maps/api/staticmap?center=41.8781,-87.6298&zoom=14&size=640x640&scale=2&maptype=terrain&key=xxx
d + geom_point(aes(x = Dropoff.Centroid.Longitude, y = Dropoff.Centroid.Latitude), data = coc, size = 0.7) +
theme(legend.position="bottom")
## Warning: Removed 38094 rows containing missing values (geom_point).
It is a bit tricky to see the density of the pickup and dropoffs because all the points are sitting on top of each other. Below sets the alpha variable which will make the dots transparent. This helps display the density of points plotted.
p + geom_point(aes(x = Pickup.Centroid.Longitude, y = Pickup.Centroid.Latitude), colour = '#011f4b', data = coc, alpha=0.25, size = 0.5) +
theme(legend.position="none")
## Warning: Removed 15497 rows containing missing values (geom_point).
p + stat_density2d(
aes(x = Pickup.Centroid.Longitude, y = Pickup.Centroid.Latitude,
fill = ..level.., alpha = 0.15), size = 0.01, bins = 30,
data = coc, geom = "polygon") #+
## Warning: Removed 15497 rows containing non-finite values (stat_density2d).
#geom_point(aes(x = x, y = y, stroke = 2), colour=col4, data = n, size =1.5) +
#geom_label_repel(aes(x, y, label = label), data=n, family = 'Times', size = 3, box.padding = 0.2, point.padding = 0.3, segment.color = 'grey50')
p + stat_density2d(
aes(x = Pickup.Centroid.Longitude, y = Pickup.Centroid.Latitude, fill = ..level.., alpha = 0.25),
size = 0.1, bins = 40, data = coc,
geom = "polygon"
) +
geom_density2d(data = coc,
aes(x = Pickup.Centroid.Longitude, y = Pickup.Centroid.Latitude), size = 0.3)
## Warning: Removed 15497 rows containing non-finite values (stat_density2d).
## Warning: Removed 15497 rows containing non-finite values (stat_density2d).
#END